%%% verslag.tex ---
%% Author: xtroce@museum
%% Version: $Id: mitschrift03.tex,v 0.0 2012/02/13 12:22:49  Exp$

\documentclass[12pt,a4paper]{article}
\special{papersize=210mm,297mm}

%% use a better geometry for A4 paper
\usepackage[a4paper,top=3cm,bottom=3cm,left=3cm,right=3cm]{geometry}

%% package for importing other latex files to this one
\usepackage{import}

%% package to use graphics in latex
\usepackage[dvipdf]{graphicx}

%%package for drawing IPA sound font
\usepackage{tipa}

%%package for multiple figures in one figure
\usepackage{subfigure}

%% package for multiple rows in a table
\usepackage{multirow}

%% package to include matlab code into latex
%% to include use \lstinputlisting{MATLABFILE}
%% to include a whole file
%\usepackage{mcode}

%% Package to colorize table output
%\usepackage{colortbl}

%% To write Umlaute in latex without encoding
%\usepackage[ansinew]{inputenc}

%% german language package
%\usepackage{ngerman}

%\usepackage[debugshow,final]{graphics}
%\usepackage{setspace}

%% for new packages use the ~/texmf/tex folder
%% run texhash afterwards



\title{Comparison of Robust and linear Method for formant detection in
  PRAAT}
\author{Sebastian Dr\"oppelmann\\ Osewa Redencio Jozefzoon}
\date{\today}

%\doublespacing
\bibliographystyle{plain}%Choose a bibliograhpic style

\begin{document}

%%Titlepage
\begin{titlepage}
\maketitle
\begin{center}
% \subimport{/home/xtroce/.emacs/}
%\includegraphics[scale=0.2]{logo_uva.ps}
\end{center}
\thispagestyle{empty}
\end{titlepage}

%%TOC
\tableofcontents
\newpage

%%%%##########################################################################

\section{Introduction}
In the paper On Robust Linear Prediction of Speech Chin-Hee Lui claims
that the Robust Linear Prediction (RLP) algorithm achieves more
accurate results than the conventional Linear Prediction (LP)
procedures.  Chin-Hee Lui states that this accuracy is achieved by the
manner in which the RLP, in contrast to the conventional LP,
*processes the weight of the prediction residuals.  The RLP provides a
less biased estimate for the prediction coefficient with less
variance.

This paper will examine this claim by contrasting the difference in
achievement by the LP and the RLP.  The tool for this *examination
will be a sound analyzing program called PRAAT, in which the LP and
the RLP will be used to analyze synthetically generated and real
voices.


\section{Synthetisized Vowels}
\subsection{Introduction}
The idea of measuring the performance of the two algorithms for
frmant analysis was to compare the performance of the detection of
known generated vowels and the difference of the two algorithms
considering the frequencies used for generation. With this method we
hoped to get some insight of how good the two different algorithms
perform, provided we know the frequencies of formants 1 and 2 (further on
F1 and F2).

\subsection{Method}
For this comparison we used the Pols \& Van Nierop (1973) table, in
which the first three formants of 50 dutch males and 25 dutch females
where measured. This table is provided by PRAAT and we used it to
generate a standard female and male voice. This was done by taking the
mean of the male and female voices to represent the standard of each
sex. The input values for generation can be seen in table
\ref{tab:inputgeneration}. For the 4th and 5th formant we used the
fixed values of 3500 and 4500 Hz. Then we varied the source's
frequency, the bandwidth and the maximum frequency. Furthermore we
used two types of generation for the vowels. One was the pulsetrain
method the other the phonation method which should generate more
realistic vowelsa. We then fixed some of the varying values to see the
performance of the algorithms according to the remaining varying
values.\\
For the varying maximum frequency and the varying source
frequency we used a bandwidth of 11\% of the pitch frequency. To
measure the bandwidth we used 100 Hz as pitch frequency for males and
200 Hz for females. For the maximum frequency we chose
4500 Hz for males and 5500 Hz for females. We did all these
measurements for both creation methods.

%% import of the input table
\subimport{scripts/tables/}{pvn.tex}

\subsection{Results}
For the generated vowels the overall performance of the robust
function was better than the performance of the burg
function. Especially the results with the phonetic generation of the
values show that the performance of the robust function is, indeed,
more robust. The varying pitch frequencies for males and females can
be seen in image \ref{fig:varsourceT0} and \ref{fig:varsourceT1} for
the pulse train and the phonetic generation.  As you can see in the
images the robust function, which is drawn in green, is much closer to
the originally generated vowels than the burg function measurements
drawn in red. Also you can see in image \ref{fig:varsourceT1} that
with the phonetic generation method the results for the burg method
produces F2 values which are way lower than they are supposed to be.
This is also true for the results with varying maximum frequencies
shown in the image \ref{fig:varmaxT0} and \ref{fig:varmaxT1}.\\
The lower F2 are generated by the fact that the burg function
sometimes measures the important fermants inaccurately and calculates
a formant with a very high bandwidth as F2 formant. So in fact it is
taking a not so important very wide peak and uses it for the F2 value
and thereby moving the anticipated F2 measurements to the F3
measurements. This could be corrected by using an error evaluation and
deleting the formants with a high error and moving the later formants
up. A real implementation for that would not be appropriate to discuss
in this paper, since this would be a seperate project.

\subimport{scripts/pics/}{pics.tex}

\section{Realspeech}
\subsection{Introduction}
The idea in this chapter was to also measure the performance of the
two algorithms with real speech. As it is impossible to know the exact
formant frequency of spoken language, we used recordings of spoken
vowel-consonant-vowel pairs where the recording was done with two
different microphones. One of the microphones was a high quality one,
the other a low quality table microphone. Through comparison we hoped
to get some insight over the performance of the two algorithms. The
idea was to analyze both recordings with both methods and calculate
the distance of the measured formants from each other for both
methods. Since the recording was done in parrallel the formants should
not differ since it is the same voice with the same intonation of the
vowel.

\subsection{Method}
We comared the performance by calculating an error value that
expresses the difference between the recordings of the two
microphones. To do this we used a logarithmic error function to
compensate the higher loss of the higher formants. At the end we
multiplied all results with 10 to scale them up and make them more readable.\\
The functions we used can be seen in \ref{fig:errorval}. For comparison
we used this function for F1 and F2 as seen in \ref{subfig:oneerror}
and to compare the combined error of F1 and F2 together as seen in
Equation \ref{subfig:twoerror}, and for F1, F2 and F3 look at
\ref{subfig:threeerror} accordingly.

\begin{figure}
\subfigure[All Values] {
$$\sqrt{(log_{10}(F1_{fm}) - log_{10}(F1_{hm}))^2 + (log_{10}(F2_{fm}) - log_{10}(F2_{hm}))^2 + (log_{10}(F3_{fm}) - log_{10}(F3_{hm}))^2}$$
\label{subfig:threeerror}
}
\subfigure[First two formants] {
$$\sqrt{(log_{10}(F1_{fm}) - log_{10}(F1_{hm}))^2 + (log_{10}(F2_{fm}) - log_{10}(F2_{hm}))^2}$$
\label{subfig:twoerror}
}
\subfigure[One Formant] {
$$|log_{10}(F1_{fm}) - log_{10}(F1_{hm})|$$
\label{subfig:oneerror}
}
\caption{Error Function}
\label{fig:errorval}
\end{figure}

\subsection{Results}
\subsubsection{Distance of the measurements}
When comparing the distance of the two recordings, the results showed
that the robust method is slightly worse with the 20, 28 and 60 year
old women, but has better results for the 40 year old woman as seen in
tables \ref{fig:F20} \ref{fig:F28} \ref{fig:F40} \ref{fig:F60}. On
some vowels the robust method performs better but the overall
considering the distance of the two measurements.

%% realvoice measurement error values
\subimport{scipts/tables/}{endresults_realvoice.tex}

\subsubsection{Statistical comparison}
When calculating the number of times in which one method achieves the
smallest difference between the outcome for the two microphones and
dividing that number by the number of measurements it becomes possible
to compare the achievements of the two methods. In contrast to our
believe, that the Robust method will achieve better results than the
Burg method, we find that the Burg method achieves better results for
the female voices. In fact, in 54 percent of the cases the Burg method
is more effective for the female voices whereas the Robust method is
more effective in 66 percent of the male voices.  More specifically,
for the prediction of the vowels a and o the Burg method appears to
predict more accurately, i.e has a smaller difference between the
prediction of the vowels for the two different microphones, and the
Robust method is more accurate for the i and u (in case of the female
voices).  For the prediction of the vowels a and i the Robust method
appears to predict more accurately and the Robust method is as
effective as the Burg method for the prediction of the u.

The average difference for each method is calculated by summing up the
biggest and smallest difference between the measurements and dividing
that by two. The Significance measure is calculated by randomly
trying to achieve the average difference.

\begin{figure}
\subfigure[Female Age 20] {
  \begin{tabular}{|l|l|l|l|l|}
    \hline
    Vowel & Burg & Significance & Robust & Significance\\
    \hline
    a& -32.632173&2.850367e-10&-43.057320&8.208838e-12\\
    \hline
    i&-26.112238&2.216405e-11&-5.750078&0.060944\\
    \hline
    u&-73.766497&7.873487e-24&-62.154746&7.582754e-21\\
    \hline
  \end{tabular}
\label{fig:F20 AvDiv}
}
\subfigure[Female Age 28] {
  \begin{tabular}{|l|l|l|l|l|}
    \hline
Vowel & Burg & Significance & Robust & Significance\\
    \hline
    a&0.259633&0.482996&4.617262&0.214917\\
    \hline
    i&-36.855000&2.7085286e-06&-20.455038&0.006420\\
    \hline
    u&-30.943822&2.266579e-10&-24.632184&5.758738e-06\\
    \hline
  \end{tabular}
\label{fig:F28 AvDiv}
}
\subfigure[Female Age 40] {
  \begin{tabular}{|l|l|l|1|1|}
    \hline
Vowel & Burg & Significance & Robust & Significance\\
    \hline
    a&-0.348412&0.412638&8.217084&1.705266e-06\\
    \hline
    i&-38.172687&8.024843e-12&-14.910756&0.004903\\
    \hline
    u&-47.498508&8.511210e-22&-38.844465&1.362751e-14\\
    \hline
  \end{tabular}
\label{fig:F40 AvDiv}
}
\subfigure[Female Age 60] {
  \begin{tabular}{|l|l|l|l|l|}
    \hline
Vowel & Burg & Significance & Robust & Significance\\
    \hline
    a&-31.666358&1.813891e-13&-39.748499&1.503235e-17\\
    \hline
    i&8.378841&0.064797&26.321407&6.667037e-07\\
    \hline
    o&-8.059247&0.210739&-23.939256&0.209040\\
    \hline
    u&-27.228002&2.429825e-08&-32.316862&2.636188e-08\\
    \hline

  \end{tabular}
  \label{fig:F60 AvDiv}
}

\subfigure[Male Age 15] {
  \begin{tabular}{|l|l|l|l|l|}
    \hline
Vowel & Burg & Significance & Robust & Significance\\
    \hline
    a&-53.692350&1.053716e-15&-45.678181&1.462322e-12\\
    \hline
    i&-55.518327&2.075646e-12&-46.620892&1.313626e-09\\
    \hline
    u&-62.212697&1.039393e-21&-63.673076&2.789465e-22\\
    \hline
  \end{tabular}
\label{fig:M15 AvDiv}
}
\subfigure[Male Age 40] {
  \begin{tabular}{|l|l|l|l|l|}
    \hline
Vowel & Burg & Significance & Robust & Significance\\
    \hline
    a&-30.502337&1.159208e-20&-11.695415&0.000152\\
    \hline
    i&-32.839408&8.327381e-08&-29.837337&5.986142e-06\\
    \hline
    u&-36.643033&4.397312e-14&-35.680238&6.052054e-14\\
    \hline
  \end{tabular}
\label{fig:M40 AvDiv}
}
\subfigure[Male Age 56] {
  \begin{tabular}{|l|l|l|l|l|}
    \hline
Vowel & Burg & Significance & Robust & Significance\\
    \hline
    a&-75.323560&1.990057e-18&-76.515917&2.292241e-19\\
    \hline
    i&-36.371798&7.770873e-09&-28.496676&2.227712e-08\\
    \hline
    u&-68.330552&6.472055e-25&-65.192702&2.099214e-21\\
    \hline
  \end{tabular}
\label{fig:M56 AvDiv}
}
\subfigure[Male Age 66] {
  \begin{tabular}{|l|l|l|l|l|}
    \hline
Vowel & Burg & Significance & Robust & Significance\\
    \hline
    a&-33.737726&1.157551e-13&-31.301528&4.616446e-12\\
    \hline
    i&-40.800060&2.047511e-10&-15.490194&0.001621\\
    \hline
    u&-33.258028&4.446857e-22&-41.623565&2.089594e-26\\
    \hline
  \end{tabular}
\label{fig:M66 AvDiv}
}
\caption{Average difference between measurements of two different microphones.}
\label{fig:rv_statistics}
\end{figure}

%%%%##########################################################################
\bibliography{verslag.bib}
\end{document}
